{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7.2 多线程爬虫"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.2.1简单单线程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-29T11:29:35.987917Z",
     "start_time": "2018-11-29T11:29:35.983879Z"
    }
   },
   "outputs": [],
   "source": [
    "proxies = {'https': 'https://web-proxy.oa.com:8080',\n",
    "           'http': 'http://web-proxy.oa.com:8080'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import time\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "\n",
    "start = time.time()\n",
    "for eachone in link_list:\n",
    "    try:\n",
    "        r = requests.get(eachone)\n",
    "        print (r.status_code, eachone)\n",
    "    except Exception as e: \n",
    "        print('Error: ', e)\n",
    "end = time.time()\n",
    "print ('串行的总时间为：', end-start)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.2.2 学习Python多线程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-29T11:33:58.186054Z",
     "start_time": "2018-11-29T11:33:58.182015Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Main Finished\n",
      "Thread-1 Thu Nov 29 19:33:59 2018\n",
      "Thread-2 Thu Nov 29 19:34:00 2018\n",
      "Thread-1 Thu Nov 29 19:34:00 2018\n",
      "Thread-1 Thu Nov 29 19:34:01 2018\n",
      "Thread-2 Thu Nov 29 19:34:02 2018\n",
      "Thread-2 Thu Nov 29 19:34:04 2018\n"
     ]
    }
   ],
   "source": [
    "import _thread\n",
    "import time\n",
    " \n",
    "# 为线程定义一个函数\n",
    "def print_time(threadName, delay):\n",
    "    count = 0\n",
    "    while count < 3:\n",
    "        time.sleep(delay)\n",
    "        count += 1\n",
    "        print (threadName, time.ctime())\n",
    "        \n",
    "_thread.start_new_thread(print_time, (\"Thread-1\", 1))\n",
    "_thread.start_new_thread(print_time, (\"Thread-2\", 2))\n",
    "print (\"Main Finished\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-29T11:42:13.596228Z",
     "start_time": "2018-11-29T11:42:07.568910Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting Thread-1Starting Thread-2\n",
      "\n",
      "Thread-1 Thu Nov 29 19:42:08 2018\n",
      "Thread-2 Thu Nov 29 19:42:09 2018\n",
      "Thread-1 Thu Nov 29 19:42:09 2018\n",
      "Thread-1 Thu Nov 29 19:42:10 2018\n",
      "Exiting Thread-1\n",
      "Thread-2 Thu Nov 29 19:42:11 2018\n",
      "Thread-2 Thu Nov 29 19:42:13 2018\n",
      "Exiting Thread-2\n",
      "Exiting Main Thread\n"
     ]
    }
   ],
   "source": [
    "import threading\n",
    "import time\n",
    " \n",
    "class myThread (threading.Thread):\n",
    "    def __init__(self, name, delay):\n",
    "        threading.Thread.__init__(self)\n",
    "        self.name = name\n",
    "        self.delay = delay\n",
    "    def run(self):\n",
    "        print (\"Starting \" + self.name)\n",
    "        print_time(self.name, self.delay)\n",
    "        print (\"Exiting \" + self.name)\n",
    "        \n",
    "def print_time(threadName, delay):\n",
    "    counter = 0\n",
    "    while counter < 3:\n",
    "        time.sleep(delay)\n",
    "        print (threadName, time.ctime())\n",
    "        counter += 1\n",
    "        \n",
    "threads = []\n",
    "\n",
    "# 创建新线程\n",
    "thread1 = myThread(\"Thread-1\", 1)\n",
    "thread2 = myThread(\"Thread-2\", 2)\n",
    " \n",
    "# 开启新线程\n",
    "thread1.start()\n",
    "thread2.start()\n",
    " \n",
    "# 添加线程到线程列表\n",
    "threads.append(thread1)\n",
    "threads.append(thread2)\n",
    " \n",
    "# 等待所有线程完成\n",
    "for t in threads:\n",
    "    t.join()\n",
    "    \n",
    "print (\"Exiting Main Thread\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## 7.2.3 多线程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting Thread-1Starting Thread-2\n",
      "\n",
      "Starting Thread-3Starting Thread-4\n",
      "\n",
      "Thread-1 200 http://www.baidu.com\n",
      "Thread-2 200 http://www.dell.com\n",
      "Thread-1 200 http://www.qq.com\n",
      "Thread-4 200 http://www.wowenda.com\n",
      "Thread-1 200 Thread-2http://www.naver.com \n",
      "200 http://www.dict.cn\n",
      "Thread-1 200 http://www.taobao.com\n",
      "Thread-3 200 http://www.unrealengine.com\n",
      "Thread-4 200 http://www.coursera.org\n",
      "Thread-1 200 http://www.reddit.com\n",
      "Thread-1 200 http://www.sohu.com\n",
      "Thread-4 200 http://www.fangdr.com\n",
      "Thread-1 200 http://www.tmall.com\n",
      "Thread-4 200 http://www.cps.com.cn\n",
      "Thread-3 200 http://www.gao7.com\n",
      "Thread-4 200 http://www.kmf.com\n",
      "Thread-1 200 http://www.sina.com.cn\n",
      "Thread-4 200 http://www.cri.cn\n",
      "Thread-3 200 http://www.leju.com\n",
      "Thread-1 200 http://www.daum.net\n",
      "Thread-1 200 http://www.jd.com\n",
      "Thread-4 200 http://www.lmjx.net\n",
      "Thread-3 200 http://www.home77.com\n",
      "Thread-3 200 http://www.qunar.com\n",
      "Thread-2 200 http://www.yinyuetai.com\n",
      "Thread-3 200 http://www.xdowns.com\n",
      "Thread-1 200 http://www.360.cn\n",
      "Thread-2 200 http://www.aizhan.com\n",
      "Thread-2 200 http://www.gome.com.cn\n",
      "Thread-3 200 http://www.oa.com\n",
      "Thread-1 200 http://www.weibo.com\n",
      "Thread-3 200 http://www.sgcn.com\n",
      "Thread-2 200 http://www.meishichina.com\n",
      "Thread-1 200 http://www.aliexpress.com\n",
      "Thread-1 200 http://www.linkedin.com\n",
      "Thread-3 200 http://www.szjy188.com\n",
      "Thread-2 200 http://www.51hejia.com\n",
      "Thread-3 200 http://www.tuniu.com\n",
      "Thread-2 200 http://www.ule.com\n",
      "Thread-2 200 http://www.ea3w.com\n",
      "Thread-2 200 http://www.saraba1st.com\n",
      "Thread-3 200 http://www.135editor.com\n",
      "Thread-2 200 http://www.chsi.com.cn\n",
      "Error:  HTTPConnectionPool(host='www.f.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078F1D30>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Error:  ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))\n",
      "Thread-2 200 http://www.vlive.tv\n",
      "Thread-2 200 http://www.sonhoo.com\n",
      "Thread-2 200 http://www.hongkongairlines.com\n",
      "Thread-3 200 http://www.jiyoujia.com\n",
      "Thread-2 200 http://www.jxnews.com.cn\n",
      "Thread-3 200 http://www.95516.com\n",
      "Thread-2 200 http://www.free.com.tw\n",
      "Thread-2 200 http://www.docin.com\n",
      "Thread-3 200 http://www.yiqifa.com\n",
      "Thread-2 200 http://www.liepin.com\n",
      "Thread-3 200 http://www.cocoachina.com\n",
      "Thread-2 200 http://www.chinaunix.net\n",
      "Thread-2 200 http://www.weibo.cn\n",
      "Thread-3 200 http://www.babyschool.com.cn\n",
      "Thread-2 200 http://www.ifanr.com\n",
      "Thread-3 200 http://www.iweihai.cn\n",
      "Thread-3 200 http://www.haowu.com\n",
      "Thread-3 200 http://www.hm.com\n",
      "Thread-3 200 http://www.wish.com\n",
      "Error:  HTTPConnectionPool(host='www.lonshinetech.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078C1438>: Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))\n",
      "Thread-3 200 http://www.fitbit.com\n",
      "Thread-1 200 http://www.alipay.com\n",
      "Thread-1 200 http://www.hao123.com\n",
      "Thread-3 200 http://www.taojindi.com\n",
      "Thread-4 200 http://www.infoq.com\n",
      "Thread-4 200 http://www.gushiwen.org\n",
      "Thread-3 200 http://www.koolearn.com\n",
      "Thread-4 200 http://www.ecp888.com\n",
      "Thread-2 200 http://www.51auto.com\n",
      "Thread-4 200 http://www.tongtool.com\n",
      "Thread-2 200 http://www.ebrun.com\n",
      "Thread-3 520 http://www.xabbs.com\n",
      "Thread-4 200 http://www.dajie.com\n",
      "Thread-1 200 http://www.csdn.net\n",
      "Thread-3 520 http://www.020.com\n",
      "Thread-2 200 http://www.10010.com\n",
      "Thread-4 200 http://www.co188.com\n",
      "Thread-1 200 http://www.youth.cn\n",
      "Thread-3 200 http://www.qiniu.com\n",
      "Thread-2 200 http://www.hebei.com.cn\n",
      "Thread-2 200 http://www.tgbus.com\n",
      "Thread-3 200 http://www.25pp.com\n",
      "Thread-2 200 http://www.mtime.com\n",
      "Thread-3 200 http://www.nga.cn\n",
      "Thread-2 200 http://www.vip.com\n",
      "Thread-2 200 http://www.kdslife.com\n",
      "Thread-1 200 http://www.live.com\n",
      "Thread-3 200 http://www.educity.cn\n",
      "Error:  HTTPConnectionPool(host='www.www.gov.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007910668>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-3 200 http://www.zealer.com\n",
      "Thread-1 200 http://www.tianya.cn\n",
      "Thread-2 200 http://www.cncn.org.cn\n",
      "Thread-3 200 http://www.xdowns.com\n",
      "Thread-1 200 http://www.microsoftonline.com\n",
      "Thread-2 200 http://www.techcrunch.com\n",
      "Thread-1 200 http://www.office.com\n",
      "Thread-4 200 http://www.fumanhua.net\n",
      "Thread-2 200 http://www.zbj.com\n",
      "Thread-2 200 http://www.ip138.com\n",
      "Thread-2 404 http://www.cyol.com\n",
      "Thread-3 200 http://www.liqu.com\n",
      "Thread-3 405 http://www.qichacha.com\n",
      "Thread-2 200 http://www.pc6.com\n",
      "Thread-4 200 http://www.maiche168.com\n",
      "Thread-2 200 http://www.joox.com\n",
      "Thread-1 200 http://www.soso.com\n",
      "Thread-3 200 http://www.51credit.com\n",
      "Error:  HTTPConnectionPool(host='www.sankuai.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007910940>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-3 200 http://www.duomai.com\n",
      "Thread-2 200 http://www.178.com\n",
      "Thread-4 404 http://www.ucas.ac.cn\n",
      "Thread-4 200 http://www.lamabang.com\n",
      "Thread-1 200 http://www.so.com\n",
      "Thread-3 200 http://www.juooo.com\n",
      "Thread-1 200 http://www.gmw.cn\n",
      "Thread-1 200 http://www.china.com\n",
      "Thread-3 200 http://www.shanbay.com\n",
      "Thread-1 200 http://www.nate.com\n",
      "Thread-4 200 http://www.huajiao.com\n",
      "Thread-3 200 http://www.juooo.com\n",
      "Thread-2 200 http://www.lagou.com\n",
      "Thread-3 200 http://www.shanbay.com\n",
      "Thread-1 200 http://www.huaban.com\n",
      "Thread-1 200 http://www.bing.com\n",
      "Thread-2 200 http://www.18183.com\n",
      "Thread-4 200 http://www.accorhotels.com\n",
      "Thread-3 200 http://www.meishij.net\n",
      "Thread-1 200 http://www.xinhuanet.com\n",
      "Thread-1 200 http://www.youku.com\n",
      "Thread-2 200 http://www.365jia.cn\n",
      "Thread-4 200 http://www.wendangku.net\n",
      "Thread-4 200 http://www.dragonparking.com\n",
      "Thread-2 200 http://www.autohome.com.cn\n",
      "Thread-4 200 http://www.6789.com\n",
      "Thread-4 200 http://www.xdf.cn\n",
      "Thread-2 200 http://www.battlenet.com.cn\n",
      "Thread-2 200 http://www.oracle.com\n",
      "Thread-2 200 http://www.miaopai.com\n",
      "Thread-2 200 http://www.sina.cn\n",
      "Thread-4 200 http://www.tucao.tv\n",
      "Thread-4 200 http://www.91yunxiao.com\n",
      "Thread-4 200 http://www.liebiao.com\n",
      "Thread-2 200 http://www.ch.com\n",
      "Thread-1 500 http://www.zhihu.com\n",
      "Thread-4 200 http://www.9lianmeng.com\n",
      "Thread-2 200 http://www.yxdown.com\n",
      "Thread-4 200 http://www.51240.com\n",
      "Thread-1 200 http://www.cctv.com\n",
      "Thread-1 200 http://www.airasia.com\n",
      "Thread-1 200 http://www.douyu.com\n",
      "Thread-4 200 http://www.zhiyoo.com\n",
      "Thread-4 200 http://www.silkair.com\n",
      "Thread-1 200 http://www.babytree.com\n",
      "Thread-1 200 http://www.apple.com\n",
      "Thread-4 200 http://www.313.cn\n",
      "Error:  HTTPConnectionPool(host='www.ssl-images-amazon.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x000000000791A748>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-1 200 http://www.sogou.com\n",
      "Thread-1 200 http://www.china.com.cn\n",
      "Thread-4 200 http://www.eepw.com.cn\n",
      "Error:  HTTPConnectionPool(host='www.gs307.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x000000000789F860>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-4 403 http://www.yindou.com\n",
      "Thread-2 200 http://www.etao.com\n",
      "Thread-1 200 http://www.yelp.com\n",
      "Thread-4 200 http://www.i1515.com\n",
      "Thread-1 200 http://www.ocbc.com\n",
      "Thread-2 200 http://www.vietnamairlines.com\n",
      "Thread-2 200 http://www.iyiou.com\n",
      "Thread-4 200 http://www.imiker.com\n",
      "Thread-1 200 http://www.microsoft.com\n",
      "Thread-1 200 http://www.mama.cn\n",
      "Thread-4 200 http://www.lvmama.com\n",
      "Thread-4 200 http://www.louisvuitton.com\n",
      "Thread-1 200 http://www.bitauto.com\n",
      "Thread-4 200 http://www.nowgoal.com\n",
      "Thread-4 200 http://www.makeding.com\n",
      "Thread-1 200 http://www.bankofamerica.com\n",
      "Thread-1 200 http://www.1688.com\n",
      "Thread-4 200 http://www.xz7.com\n",
      "Thread-4 200 http://www.guitarchina.com\n",
      "Thread-1 200 http://www.stackoverflow.com\n",
      "Thread-4 200 http://www.wto168.net\n",
      "Thread-4 200 http://www.abchina.com\n",
      "Thread-4 200 http://www.fzdm.com\n",
      "Thread-1 200 http://www.163.com\n",
      "Thread-4 200 http://www.ichacha.net\n",
      "Thread-4 200 http://www.1024sj.com\n",
      "Thread-4 200 http://www.ef43.com.cn\n",
      "Thread-4 200 http://www.newrank.cn\n",
      "Thread-4 200 http://www.ceair.com\n",
      "Thread-4 200 http://www.zimuku.net\n",
      "Thread-4 200 http://www.ppkoo.com\n",
      "Thread-1 200 http://www.39.net\n",
      "Thread-4 200 http://www.jc35.com\n",
      "Thread-1 200 http://www.cnblogs.com\n",
      "Thread-1 200 http://www.bilibili.com\n",
      "Thread-2 200 http://www.shop.com\n",
      "Thread-1 200 http://www.interpark.com\n",
      "Thread-1 200 http://www.huanqiu.com\n",
      "Thread-2 200 http://www.588ku.com\n",
      "Thread-2 200 http://www.le.com\n",
      "Thread-1 200 http://www.cnzz.com\n",
      "Thread-2 200 http://www.sina.com\n",
      "Thread-1 200 http://www.chinadaily.com.cn\n",
      "Error:  ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))\n",
      "Thread-4 200 http://www.dnspod.cn\n",
      "Thread-2 200 http://www.jstv.com\n",
      "Thread-2 200 http://www.ceconline.com\n",
      "Thread-1 200 http://www.msn.com\n",
      "Thread-4 200 http://www.hsw.cn\n",
      "Thread-2 200 http://www.koreanair.com\n",
      "Thread-1 200 http://www.k618.cn\n",
      "Thread-2 200 http://www.skype.com\n",
      "Thread-4 200 http://www.caixin.com\n",
      "Thread-1 200 http://www.yesky.com\n",
      "Thread-2 200 http://www.ih5.cn\n",
      "Thread-1 200 http://www.caijing.com.cn\n",
      "Thread-2 200 http://www.ems.com.cn\n",
      "Thread-2 520 http://www.efu.com.cn\n",
      "Thread-4 200 http://www.manmanbuy.com\n",
      "Thread-4 200 http://www.23us.com\n",
      "Thread-1 200 http://www.emirates.com\n",
      "Thread-4 200 http://www.asus.com\n",
      "Thread-2 200 http://www.pcbaby.com.cn\n",
      "Thread-4 200 http://www.zoosnet.net\n",
      "Thread-1 200 http://www.amazon.cn\n",
      "Thread-2 200 http://www.shimo.im\n",
      "Thread-4 200 http://www.xp510.com\n",
      "Thread-1 200 http://www.aliyun.com\n",
      "Thread-4 200 http://www.vgtime.com\n",
      "Thread-2 200 http://www.macaolife.com\n",
      "Thread-1 200 http://www.eastday.com\n",
      "Thread-4 200 http://www.qiushibaike.com\n",
      "Thread-1 200 http://www.youdao.com\n",
      "Thread-2 200 http://www.xiu.com\n",
      "Thread-2 200 http://www.eastmoney.com\n",
      "Thread-1 200 http://www.oeeee.com\n",
      "Thread-1 404 http://www.ci123.com\n",
      "Thread-4 200 http://www.jinshuju.net\n",
      "Thread-4 200 http://www.115.com\n",
      "Thread-2 200 http://www.xiumi.us\n",
      "Thread-4 200 http://www.3367.com\n",
      "Thread-4 200 http://www.fanli.com\n",
      "Thread-4 200 http://www.newcger.com\n",
      "Thread-2 200 http://www.yhd.com\n",
      "Thread-4 200 http://www.kepu.net.cn\n",
      "Thread-2 200 http://www.jiemian.com\n",
      "Thread-1 200 http://www.baike.com\n",
      "Thread-3 504 http://www.th7.cn\n",
      "Thread-1 200 http://www.adobe.com\n",
      "Thread-2 200 http://www.daikuan.com\n",
      "Thread-2 200 http://www.ximalaya.com\n",
      "Thread-1 200 http://www.rednet.cn\n",
      "Thread-2 200 http://www.marriott.com\n",
      "Thread-1 200 http://www.iqiyi.com\n",
      "Thread-4 200 http://www.findlaw.cn\n",
      "Thread-3 200 http://www.jia400.com\n",
      "Thread-4 200 http://www.jiumei.com\n",
      "Thread-2 200 http://www.d1ev.com\n",
      "Thread-3 200 http://www.cas.cn\n",
      "Thread-3 200 http://www.wenwuchina.com\n",
      "Thread-4 520 http://www.gkstk.com\n",
      "Thread-3 200 http://www.189.cn\n",
      "Thread-4 200 http://www.ihg.com\n",
      "Thread-2 200 http://www.xitek.com\n",
      "Thread-4 200 http://www.blizzard.com\n",
      "Thread-1 200 http://www.wemakeprice.com\n",
      "Thread-2 403 http://www.chuansong.me\n",
      "Thread-4 200 http://www.lenovo.com\n",
      "Thread-2 200 http://www.alitrip.com\n",
      "Thread-1 200 http://www.douban.com\n",
      "Thread-2 200 http://www.xiaomi.cn\n",
      "Thread-1 200 http://www.familydoctor.com.cnError: \n",
      " ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))\n",
      "Thread-2 200 http://www.91jm.com\n",
      "Thread-1 200 http://www.agoda.com\n",
      "Thread-3 200 http://www.liuxue86.com\n",
      "Thread-2 200 http://www.2cto.com\n",
      "Thread-2 403 http://www.qoo10.com\n",
      "Thread-3 200 http://www.klook.com\n",
      "Thread-2 200 http://www.centadata.com\n",
      "Thread-1 200 http://www.jrj.com.cn\n",
      "Thread-2 405 http://www.lufthansa.com\n",
      "Thread-2 200 http://www.techweb.com.cn\n",
      "Thread-2 200 http://www.kugou.com\n",
      "Thread-1 200 http://www.read01.com\n",
      "Thread-2 200 http://www.80018.cn\n",
      "Thread-1 200 http://www.17ok.com\n",
      "Thread-2 200 http://www.tmtpost.com\n",
      "Thread-1 200 http://www.chinaz.com\n",
      "Thread-1 200 http://www.youboy.com\n",
      "Thread-1 200 http://www.tesco.com\n",
      "Thread-1 200 http://www.alibaba.com\n",
      "Thread-1 200 http://www.gearbest.com\n",
      "Thread-1 200 http://www.51sole.com\n",
      "Thread-1 200 http://www.dbs.com\n",
      "Thread-2 200 http://www.house365.com\n",
      "Thread-2 200 http://www.hp.com\n",
      "Thread-2 200 http://www.unity3d.com\n",
      "Thread-3 200 http://www.shfft.com\n",
      "Thread-4 200 http://www.longau.com\n",
      "Thread-3 200 http://www.8264.com\n",
      "Thread-2 200 http://www.zoom.us\n",
      "Thread-2 200 http://www.kafan.cn\n",
      "Thread-2 200 http://www.liansuo.com\n",
      "Thread-3 200 http://www.china.cn\n",
      "Thread-3 200 http://www.zhifang.com\n",
      "Thread-2 200 http://www.netease.comThread-3\n",
      " 200 http://www.made-in-china.com\n",
      "Thread-3 200 http://www.rabbitpre.com\n",
      "Thread-2 200 http://www.10jqka.com.cn\n",
      "Thread-3 200 http://www.sap.com\n",
      "Thread-3 200 http://www.macx.cn\n",
      "Thread-3 200 http://www.everychina.com\n",
      "Thread-3 200 http://www.9game.cn\n",
      "Thread-2 200 http://www.xiazaiba.com\n",
      "Thread-2 200 http://www.fang.com\n",
      "Thread-3 200 http://www.ca800.com\n",
      "Thread-2 200 http://www.smartisan.com\n",
      "Thread-3 200 http://www.dgtle.com\n",
      "Thread-2 200 http://www.photofans.cn\n",
      "Thread-2 200 http://www.ooopic.com\n",
      "Thread-2 200 http://www.zybang.com\n",
      "Thread-2 200 http://www.gw-ec.com\n",
      "Thread-3 200 http://www.cloudscar.com\n",
      "Thread-2 200 http://www.wed114.cn\n",
      "Thread-2 200 http://www.huomao.com\n",
      "Thread-3 200 http://www.bdhome.cn\n",
      "Thread-2 403 http://www.ithome.com\n",
      "Thread-3 200 http://www.news18a.com\n",
      "Thread-3 200 http://www.shilladfs.com\n",
      "Thread-3 400 http://www.net-a-porter.com\n",
      "Thread-3 200 http://www.zealer.com\n",
      "Thread-3 200 http://www.discoverhongkong.com\n",
      "Thread-3 200 http://www.80s.tw\n",
      "Thread-3 200 http://www.9ku.com\n",
      "Thread-3 200 http://www.33lc.com\n",
      "Thread-4 200 http://www.seedit.com\n",
      "Thread-3 200 http://www.thepaper.cn\n",
      "Thread-4 200 http://www.ofweek.com\n",
      "Thread-3 200 http://www.scswl.cn\n",
      "Thread-3 403 http://www.officedepot.com\n",
      "Thread-4 200 http://www.61baobao.com\n",
      "Thread-3 200 http://www.fx678.com\n",
      "Thread-3 200 http://www.banma.com\n",
      "Thread-4 200 http://www.400.cn\n",
      "Error:  HTTPConnectionPool(host='www.ccb.com.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078B87B8>: Failed to establish a new connection: [WinError 10061] 由于目标计算机积极拒绝，无法连接。',))\n",
      "Thread-2 200 http://www.chinanews.com\n",
      "Thread-4 200 http://www.wines-info.com\n",
      "Thread-3 200 http://www.eee114.com\n",
      "Thread-2 200 http://www.doc88.com\n",
      "Thread-4 200 http://www.innisfree.com\n",
      "Thread-4 200 http://www.weather.com.cn\n",
      "Thread-2 200 http://www.sanguosha.com\n",
      "Thread-3 200 http://www.9384.com\n",
      "Thread-2 200 http://www.evaair.com\n",
      "Thread-2 200 http://www.icbc.com.cn\n",
      "Thread-2 200 http://www.youxidudu.com\n",
      "Thread-3 200 http://www.xuexila.com\n",
      "Thread-4 200 http://www.che168.com\n",
      "Thread-4 200 http://www.dilidili.wang\n",
      "Thread-3 200 http://www.9384.com\n",
      "Thread-4 200 http://www.7po.com\n",
      "Thread-2 200 http://www.verycd.com\n",
      "Thread-4 200 http://www.qiushibaike.com\n",
      "Thread-3 200 http://www.xuexila.com\n",
      "Thread-1 200 http://www.suning.com\n",
      "Thread-1Thread-3  200403  http://www.cheshen.cnhttp://www.oschina.net\n",
      "\n",
      "Thread-2 200 http://www.netcoc.com\n",
      "Thread-3 200 http://www.mr-world.com\n",
      "Thread-4 200 http://www.9r.cn\n",
      "Thread-1 200 http://www.voc.com.cn\n",
      "Thread-4 200 http://www.weather.com.cn\n",
      "Thread-3 200 http://www.fx112.com\n",
      "Thread-1 200 http://www.zol.com.cn\n",
      "Thread-1 200 http://www.asos.com\n",
      "Thread-3 200 http://www.97665.com\n",
      "Thread-4 200 http://www.107cine.com\n",
      "Thread-1 200 http://www.chinaso.com\n",
      "Thread-2 200 http://www.pepper.com\n",
      "Thread-1 200 http://www.jianshu.com\n",
      "Thread-2 200 http://www.dygang.com\n",
      "Thread-4 200 http://www.coolapk.com\n",
      "Thread-1 200 http://www.ifeng.com\n",
      "Thread-1 200 http://www.stockstar.com\n",
      "Thread-2 200 http://www.liaoxuefeng.com\n",
      "Thread-3 200 http://www.chinahr.com\n",
      "Thread-4 200 http://www.ixueshu.com\n",
      "Thread-2 200 http://www.flyasiana.com\n",
      "Thread-4 200 http://www.iplaysoft.com\n",
      "Thread-2 200 http://www.sciencenet.cn\n",
      "Thread-1 200 http://www.zhanqi.tv\n",
      "Thread-1 200 http://www.52pk.com\n",
      "Thread-2 200 http://www.feiyang.com\n",
      "Thread-4 200 http://www.blizzard.cn\n",
      "Thread-1 200 http://www.whatsbuying.com\n",
      "Thread-4 200 http://www.dangbei.com\n",
      "Thread-1 200 http://www.cqnews.net\n",
      "Thread-4 200 http://www.hellorf.com\n",
      "Thread-3 200 http://www.acs.org\n",
      "Thread-2 200 http://www.800hr.com\n",
      "Thread-2 200 http://www.iconfont.cn\n",
      "Thread-1 200 http://www.gongchang.com\n",
      "Thread-4 200 http://www.21food.cn\n",
      "Thread-3 200 http://www.mikecrm.com\n",
      "Thread-2 200 http://www.youzan.com\n",
      "Thread-3 200 http://www.checheng.com\n",
      "Thread-4 200 http://www.libaclub.com\n",
      "Thread-1 200 http://www.godaddy.com\n",
      "Thread-1 200 http://www.godaddy.com\n",
      "Thread-4 200 http://www.outofmemory.cn\n",
      "Thread-2 200 http://www.360kan.com\n",
      "Thread-1 200 http://www.wtoip.com\n",
      "Thread-1 200 http://www.segmentfault.com\n",
      "Thread-2 200 http://www.chinabyte.com\n",
      "Thread-4 200 http://www.ele.me\n",
      "Thread-2 200 http://www.samsung.com\n",
      "Thread-1 200 http://www.evernote.com\n",
      "Thread-4 200 http://www.shihuo.cn\n",
      "Thread-4 200 http://www.zmz2017.com\n",
      "Thread-1 200 http://www.dianping.com\n",
      "Thread-2 200 http://www.zxart.cn\n",
      "Thread-2 200 http://www.gucheng.com\n",
      "Thread-4 200 http://www.zybuluo.com\n",
      "Thread-4 200 http://www.66ys.tv\n",
      "Thread-2 200 http://www.bootcss.com\n",
      "Thread-2 200 http://www.cankaoxiaoxi.com\n",
      "Thread-4 200 http://www.sczw.com\n",
      "Thread-1 200 http://www.qingdaonews.com\n",
      "Thread-2 200 http://www.58pic.com\n",
      "Thread-1 200 http://www.guancha.cn\n",
      "Thread-4 200 http://www.xtx6.com\n",
      "Thread-2 200 http://www.81.cn\n",
      "Thread-1 200 http://www.standardchartered.com\n",
      "Thread-4 200 http://www.tutorabc.com\n",
      "Thread-1 200 http://www.singaporeair.com\n",
      "Thread-2 200 http://www.csair.com\n",
      "Thread-1 200 http://www.toutiao.com\n",
      "Thread-1 200 http://www.jiameng.com\n",
      "Thread-1 200 http://www.dm5.com\n",
      "Thread-4 200 http://www.zhipin.com\n",
      "Thread-2 410 http://www.chiphell.com\n",
      "Thread-2 200 http://www.antpedia.com\n",
      "Thread-1 200 http://www.w3school.com.cn\n",
      "Thread-2 200 http://www.xiachufang.com\n",
      "Thread-1 200 http://www.zhaopin.com\n",
      "Thread-2 200 http://www.winshang.com\n",
      "Thread-1 200 http://www.99.com\n",
      "Thread-1 200 http://www.mi.com\n",
      "Thread-2 200 http://www.fzg360.com\n",
      "Thread-1 200 http://www.b2b.cn\n",
      "Thread-2 200 http://www.chaduo.com\n",
      "Thread-1 200 http://www.cathaypacific.com\n",
      "Thread-2 200 http://www.12306.cn\n",
      "Thread-2 200 http://www.morningpost.com.cn\n",
      "Thread-1 200 http://www.southcn.com\n",
      "Thread-2 200 http://www.soku.com\n",
      "Thread-1 200 http://www.battle.net\n",
      "Thread-2 200 http://www.sspai.com\n",
      "Thread-1 200 http://www.ups.com\n",
      "Thread-1 200 http://www.jb51.net\n",
      "Thread-2 200 http://www.yoox.com\n",
      "Thread-1 200 http://www.comcast.net\n",
      "Thread-2 500 http://www.huxiu.com\n",
      "Thread-2 200 http://www.nyu.edu\n",
      "Thread-2 200 http://www.jiwu.com\n",
      "Thread-2 200 http://www.u17.com\n",
      "Thread-2 200 http://www.jiayuan.com\n",
      "Thread-4 200 http://www.cgdc.com.cn\n",
      "Thread-4 200 http://www.61learn.com\n",
      "Thread-4 200 http://www.sm.cn\n",
      "Thread-2 200 http://www.yy.com\n",
      "Thread-2 200 http://www.duowan.com\n",
      "Thread-4 200 http://www.571xz.com\n",
      "Thread-4 461 http://www.sobt5.org\n",
      "Thread-4 200 http://www.starwoodhotels.com\n",
      "Thread-4 200 http://www.qqtn.com\n",
      "Thread-2 200 http://www.mbalib.com\n",
      "Thread-4 200 http://www.sgamer.com\n",
      "Thread-2 200 http://www.wanfangdata.com.cn\n",
      "Thread-4 403 http://www.120ask.com\n",
      "Thread-1 502 http://www.alicdn.com\n",
      "Thread-4 200 http://www.appinn.com\n",
      "Thread-2 200 http://www.ibuying.com\n",
      "Thread-1 200 http://www.v2ex.com\n",
      "Thread-4 200 http://www.qianzhan.com\n",
      "Thread-1 200 http://www.firefoxchina.cn\n",
      "Thread-2 200 http://www.chouti.com\n",
      "Thread-4 200 http://www.888pic.com\n",
      "Thread-1 200 http://www.360doc.com\n",
      "Thread-1 200 http://www.xunlei.com\n",
      "Thread-2 200 http://www.71.net\n",
      "Thread-4 200 http://www.tianyancha.com\n",
      "Thread-4 200 http://www.k73.com\n",
      "Thread-2 200 http://www.hrloo.com\n",
      "Thread-2 200 http://www.meizu.com\n",
      "Thread-4 200 http://www.yiibai.com\n",
      "Thread-1 200 http://www.sharepoint.com\n",
      "Thread-2 200 http://www.miercn.com\n",
      "Thread-4 200 http://www.downxia.com\n",
      "Thread-2 200 http://www.fengniao.com\n",
      "Thread-4 200 http://www.managershare.com\n",
      "Thread-4 200 http://www.downcc.com\n",
      "Thread-2 200 http://www.fangdd.com\n",
      "Thread-2 200 http://www.htc.com\n",
      "Thread-4 200 http://www.biquge.tw\n",
      "Thread-2 200 http://www.jdzj.com\n",
      "Thread-2 200 http://www.pcauto.com.cn\n",
      "Thread-2 200 http://www.kaola.com\n",
      "Thread-2 200 http://www.kuaidi100.com\n",
      "Thread-4 200 http://www.fgowiki.com\n",
      "Thread-4 403 http://www.p2peye.com\n",
      "Thread-4 200Thread-1  http://www.haosou.com200\n",
      " http://www.scol.com.cn\n",
      "Thread-2 200 http://www.yougov.com\n",
      "Thread-2 200 http://www.ku6.com\n",
      "Thread-1 200 http://www.admaimai.com\n",
      "Thread-3 504 http://www.appgame.com\n",
      "Thread-4 200 http://www.yimu100.com\n",
      "Thread-4 200 http://www.fox.com\n",
      "Thread-2 200 http://www.sanwen8.cn\n",
      "Thread-1 200 http://www.v1.cn\n",
      "Thread-4 400 http://www.mrporter.com\n",
      "Thread-4 503 http://www.genshuixue.com\n",
      "Thread-1 200 http://www.51cto.com\n",
      "Thread-2 200 http://www.yiwugou.com\n",
      "Thread-2 200 http://www.lottedfs.com\n",
      "Thread-2 200 http://www.cisco.com\n",
      "Thread-3 200 http://www.linkhaitao.com\n",
      "Thread-1 200 http://www.jqw.com\n",
      "Thread-4 200 http://www.jisutiyu.com\n",
      "Thread-1 200 http://www.bzw315.com\n",
      "Thread-1 200 http://www.126.com\n",
      "Thread-4 200 http://www.topfo.com\n",
      "Thread-4 200 http://www.right.com.cn\n",
      "Thread-2 200 http://www.wallstreetcn.com\n",
      "Thread-4 200 http://www.5ewin.com\n",
      "Thread-4 503 http://www.dongnanshan.com\n",
      "Thread-2 200 http://www.gamedog.cn\n",
      "Thread-2 200 http://www.tencent.com\n",
      "Thread-4 200 http://www.jizhangla.com\n",
      "Thread-3 200 http://www.meipai.com\n",
      "Thread-3 403 http://www.linuxidc.com\n",
      "Thread-2 200 http://www.tvhome.com\n",
      "Thread-2 200 http://www.xbox.com\n",
      "Thread-4 200 http://www.laawoo.com\n",
      "Thread-2 200 http://www.cr173.com\n",
      "Thread-3 200 http://www.fliggy.com\n",
      "Thread-3 200 http://www.amap.com\n",
      "Thread-2 200 http://www.onlinedown.net\n",
      "Thread-3 200 http://www.4px.com\n",
      "Thread-2 200 http://www.ebay.com.hk\n",
      "Thread-4 200 http://www.3618med.com\n",
      "Error:  HTTPConnectionPool(host='www.qpic.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078D7BE0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-4 200 http://www.ahgame.com\n",
      "Thread-3 200 http://www.modao.cc\n",
      "Thread-4 200 http://www.mamicode.com\n",
      "Thread-2 200 http://www.searchs.cn\n",
      "Thread-2 200 http://www.17track.net\n",
      "Thread-4 200 http://www.wugu.com.cn\n",
      "Thread-4 200 http://www.115.com\n",
      "Thread-2 200 http://www.hyundai.com\n",
      "Thread-4 503 http://www.genshuixue.com\n",
      "Thread-2 200 http://www.baixing.com\n",
      "Thread-4 200 http://www.57mh.com\n",
      "Thread-3 200 http://www.dianxiaomi.com\n",
      "Thread-3 200 http://www.56.com\n",
      "Thread-3 200 http://www.java.com\n",
      "Thread-4 200 http://www.oiegg.com\n",
      "Thread-3 200 http://www.hdpfans.com\n",
      "Thread-3 200 http://www.thinkphp.cn\n",
      "Thread-4 200 http://www.21csp.com.cn\n",
      "Thread-4 200 http://www.kekenet.com\n",
      "Thread-3 200 http://www.2345.com\n",
      "Thread-4 200 http://www.c5game.com\n",
      "Thread-3 200 http://www.baoku.com\n",
      "Thread-3 200 http://www.tiancity.com\n",
      "Thread-1 200 http://www.beanfun.com\n",
      "Thread-4 200 http://www.juejin.im\n",
      "Thread-4 200 http://www.baofeng.com\n",
      "Thread-4 200 http://www.kuwo.cn\n",
      "Thread-4 200 http://www.6.cn\n",
      "Thread-1 200 http://www.chooseauto.com.cn\n",
      "Thread-1 200 http://www.renren.com\n",
      "Thread-1 200 http://www.taleo.net\n",
      "Thread-1 200 http://www.51.la\n",
      "Thread-1 200 http://www.zcool.com.cn\n",
      "Thread-1 200 http://www.4399.com\n",
      "Thread-1 200 http://www.duba.com\n",
      "Thread-1 200 http://www.globaltimes.cn\n",
      "Thread-4 200 http://www.chayu.com\n",
      "Thread-4 200 http://www.sanwen.net\n",
      "Thread-1 200 http://www.ycwb.com\n",
      "Thread-2 200 http://www.258.com\n",
      "Thread-1 200 http://www.sfacg.com\n",
      "Thread-1 200 http://www.hotelscombined.com\n",
      "Thread-4 200 http://www.962.net\n",
      "Thread-2 200 http://www.cn2che.com\n",
      "Thread-1 200 http://www.mydrivers.com\n",
      "Thread-4 200 http://www.etest.net.cn\n",
      "Thread-1 200 http://www.taoche.com\n",
      "Thread-4 200 http://www.innisfree.com\n",
      "Thread-2 200 http://www.pudn.com\n",
      "Thread-1 200 http://www.runoob.com\n",
      "Thread-4 200 http://www.dragonair.com\n",
      "Thread-2 200 http://www.dv37.com\n",
      "Thread-4 200 http://www.vjshi.com\n",
      "Thread-2 200 http://www.dv37.com\n",
      "Thread-4 200 http://www.lawtime.cn\n",
      "Thread-1 200 http://www.tlscontact.com\n",
      "Thread-2 200 http://www.uisdc.com\n",
      "Thread-1 200 http://www.nba.com\n",
      "Thread-1 200 http://www.gamebase.com.tw\n",
      "Thread-4 200 http://www.sccnn.com\n",
      "Thread-2 200 http://www.sojump.com\n",
      "Thread-4 200 http://www.qqbaobao.com\n",
      "Thread-1 200 http://www.zhibo8.cc\n",
      "Thread-4 200 http://www.dragonair.com\n",
      "Thread-1 403 http://www.hexun.com\n",
      "Thread-1 400 http://www.xiami.com\n",
      "Thread-2 200 http://www.d1net.com\n",
      "Thread-1 456 http://www.finnair.com\n",
      "Thread-4 200 http://www.vjshi.com\n",
      "Thread-1 200 http://www.feng.com\n",
      "Error:  HTTPConnectionPool(host='www.bcsh.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x000000000659DD68>: Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))\n",
      "Thread-4 200 http://www.lawtime.cn\n",
      "Thread-2 200 http://www.ganji.com\n",
      "Thread-4 200 http://www.sccnn.com\n",
      "Thread-1 200 http://www.cdstm.cn\n",
      "Thread-2 200 http://www.jobbole.com\n",
      "Thread-4 200 http://www.qqbaobao.com\n",
      "Thread-1 200 http://www.uniqlo.com\n",
      "Thread-3 200 http://www.bozhong.com\n",
      "Thread-2 200 http://www.pearsoncmg.com\n",
      "Thread-1 200 http://www.iciba.com\n",
      "Thread-2 503 http://www.kongfz.com\n",
      "Thread-1 200 http://www.qudong.com\n",
      "Thread-4 200 http://www.chinaswitch.com\n",
      "Thread-1 200 http://www.panda.tv\n",
      "Thread-3 200 http://www.zhiding.cn\n",
      "Thread-3 200 http://www.longzhu.com\n",
      "Thread-1 200 http://www.cnbeta.com\n",
      "Thread-1 200 http://www.nipic.com\n",
      "Thread-4 200 http://www.5118.com\n",
      "Thread-4Thread-1 200  200http://www.cntv.cn \n",
      "http://www.sznews.com\n",
      "Thread-3 200 http://www.xjtour.com\n",
      "Thread-1 200 http://www.huawei.com\n",
      "Thread-1 200 http://www.tuicool.com\n",
      "Thread-1 200 http://www.baimao.com\n",
      "Thread-1 200 http://www.umeng.com\n",
      "Thread-4 200 http://www.knowsky.com\n",
      "Thread-4 416 http://www.skyscanner.com\n",
      "Thread-1 200 http://www.ccidnet.com\n",
      "Thread-4 200 http://www.wrz.com\n",
      "Thread-4 200 http://www.wasu.cn\n",
      "Thread-1 200 http://www.klm.com\n",
      "Thread-4 200 http://www.mojifen.com\n",
      "Thread-3 200 http://www.kancloud.cn\n",
      "Thread-4 200 http://www.nvidia.com\n",
      "Thread-3 200 http://www.open-open.com\n",
      "Thread-3 200 http://www.itpub.net\n",
      "Thread-4 200 http://www.oceanpark.com.hk\n",
      "Thread-3 200 http://www.elong.com\n",
      "Thread-4 200 http://www.pcbeta.com\n",
      "Thread-3 200 http://www.pchome.net\n",
      "Thread-4 200 http://www.psnine.com\n",
      "Thread-1 200 http://www.qcloud.com\n",
      "Thread-3 200 http://www.pps.tv\n",
      "Thread-4 403 http://www.228.com.cn\n",
      "Thread-4 200 http://www.zhuixinfan.com\n",
      "Thread-1 200 http://www.hupu.com\n",
      "Thread-3 403 http://www.qinqinbaby.com\n",
      "Thread-1 200 http://www.ikanman.com\n",
      "Thread-1 200 http://www.3dmgame.com\n",
      "Thread-4 200 http://www.okcoin.cn\n",
      "Thread-3 200 http://www.chuandong.com\n",
      "Thread-4 200 http://www.huya.com\n",
      "Thread-1 200 http://www.icolor.com.cn\n",
      "Thread-4 200 http://www.1ppt.com\n",
      "Thread-3 200 http://www.coding.net\n",
      "Thread-3 200 http://www.yidianzixun.com\n",
      "Thread-1 200 http://www.360.com\n",
      "Thread-4 200 http://www.fyber.com\n",
      "Thread-4 200 http://www.72byte.com\n",
      "Thread-4 200 http://www.cpic.com.cn\n",
      "Thread-3 200 http://www.51nb.com\n",
      "Thread-3 200 http://www.dhgate.com\n",
      "Thread-1 200 http://www.36kr.com\n",
      "Thread-4 200 http://www.wlmq.com\n",
      "Thread-1 200 http://www.miui.com\n",
      "Thread-4 200 http://www.lusongsong.com\n",
      "Thread-1 200 http://www.boc.cn\n",
      "Thread-1 200 http://www.gamersky.com\n",
      "Error:  HTTPConnectionPool(host='www.365jilin.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007947A20>: Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))\n",
      "Thread-2 200 http://www.strawberrynet.com\n",
      "Thread-1 200 http://www.joyme.com\n",
      "Thread-4 200 http://www.fanjian.net\n",
      "Thread-2 200 http://www.11467.com\n",
      "Thread-1 200 http://www.17173.com\n",
      "Thread-4 200 http://www.hopetrip.com.hk\n",
      "Thread-1 200 http://www.uc.cn\n",
      "Thread-1 200 http://www.alimama.com\n",
      "Thread-4 200 http://www.hnjy.com.cn\n",
      "Thread-1 200 http://www.oasgames.com\n",
      "Thread-1 200 http://www.focus.cn\n",
      "Thread-1 200 http://www.cnr.cn\n",
      "Thread-4 200 http://www.8kana.com\n",
      "Thread-1 200 http://www.miomio.tv\n",
      "Thread-1 200 http://www.jjwxc.net\n",
      "Thread-4 200 http://www.8d.cc\n",
      "Thread-4 200 http://www.linux.cn\n",
      "Thread-1 200 http://www.5dcar.com\n",
      "Thread-1 403 http://www.hjenglish.com\n",
      "Thread-4 200 http://www.enterprise.com\n",
      "Thread-4 200 http://www.iqing.in\n",
      "Thread-1 200 http://www.dangdang.com\n",
      "Thread-4 200 http://www.sg560.com\n",
      "Thread-4 200 http://www.mnw.cn\n",
      "Thread-4 200 http://www.trendmicro.com\n",
      "Thread-1 200 http://www.springer.com\n",
      "Thread-1 200 http://www.to8to.com\n",
      "Thread-4 200 http://www.sipo.gov.cn\n",
      "Thread-1 200 http://www.xiaomi.com\n",
      "Thread-4 200 http://www.a.com.cn\n",
      "Thread-1 200 http://www.ctrip.com\n",
      "Thread-4 200 http://www.hangame.com\n",
      "Thread-4 200 http://www.cngold.org\n",
      "Thread-1 200 http://www.delta.com\n",
      "Thread-4 200 http://www.95095.com\n",
      "Thread-1 404 http://www.anjuke.com\n",
      "Thread-1 200 http://www.cnki.net\n",
      "Thread-4 200 http://www.ishuo.cn\n",
      "Thread-4 200 http://www.tecenet.com\n",
      "Thread-4 200 http://www.jinti.com\n",
      "Thread-1 200 http://www.surveymonkey.com\n",
      "Thread-1 200 http://www.tower.im\n",
      "Error:  HTTPConnectionPool(host='www.baiducontent.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x000000000659D1D0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-4 200 http://www.sobaidupan.com\n",
      "Thread-4 521 http://www.ichunqiu.com\n",
      "Thread-1 200 http://www.acfun.cn\n",
      "Thread-4 200 http://www.xilu.com\n",
      "Thread-1 200 http://www.people.com.cn\n",
      "Error:  HTTPConnectionPool(host='www.jobui.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x000000000791AA20>: Failed to establish a new connection: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))\n",
      "Thread-1 200 http://www.jmw.com.cn\n",
      "Thread-2 200 http://www.hh010.com\n",
      "Thread-4 200 http://www.3987.com\n",
      "Thread-4 200 http://www.rr-sc.com\n",
      "Thread-4 200 http://www.99114.com\n",
      "Thread-2 200 http://www.teambition.com\n",
      "Thread-1 200 http://www.worktile.com\n",
      "Thread-4 200 http://www.haodou.com\n",
      "Thread-2 200 http://www.woshipm.com\n",
      "Thread-1 500 http://www.newsmth.net\n",
      "Thread-2 200 http://www.lge.com\n",
      "Thread-1 200 http://www.vmall.com\n",
      "Thread-1 200 http://www.07073.com\n",
      "Thread-2 200 http://www.kanxi.cc\n",
      "Thread-1 200 http://www.qyer.com\n",
      "Thread-4 200 http://www.wolfram.com\n",
      "Thread-1 403 http://www.hujiang.com\n",
      "Thread-1 200 http://www.cnnic.cn\n",
      "Thread-4 200 http://www.expreview.com\n",
      "Thread-4 520 http://www.myexception.cn\n",
      "Thread-2 200 http://www.leiphone.com\n",
      "Thread-1 200 http://www.meituan.com\n",
      "Thread-4 200 http://www.shixiseng.com\n",
      "Thread-1 200 http://www.yinxiang.com\n",
      "Thread-4 200 http://www.bjjs.gov.cn\n",
      "Thread-1 200 http://www.ngacn.cc\n",
      "Thread-2 200 http://www.d1com.com\n",
      "Thread-4 200 http://www.xxbiquge.com\n",
      "Thread-1 403 http://www.smzdm.com\n",
      "Thread-2 200 http://www.114so.cn\n",
      "Thread-4 200 http://www.lesports.com\n",
      "Thread-1 200 http://www.ccb.com\n",
      "Thread-1 200 http://www.ali213.net\n",
      "Thread-2 200 http://www.d1com.com\n",
      "Thread-2 200 http://www.114so.cn\n",
      "Thread-2 200 http://www.duomai.com\n",
      "Thread-1 200 http://www.alibaba-inc.com\n",
      "Thread-1 200 http://www.3158.cn\n",
      "Thread-1 200 http://www.vmall.com\n",
      "Thread-1 200 http://www.nike.com\n",
      "Thread-1 200 http://www.eqxiu.com\n",
      "Thread-1 200 http://www.jandan.net\n",
      "Thread-1 200 http://www.office365.com\n",
      "Thread-2 200 http://www.win007.com\n",
      "Thread-2 200 http://www.weidian.com\n",
      "Thread-4 200 http://www.hea.cn\n",
      "Thread-1 200 http://www.imooc.com\n",
      "Thread-1 200 http://www.ikea.com\n",
      "Thread-2 200 http://www.qiku.com\n",
      "Thread-1 200 http://www.united.com\n",
      "Thread-2 200 http://www.cli.im\n",
      "Thread-4 200 http://www.24home.com\n",
      "Thread-4 200 http://www.yeah.net\n",
      "Thread-2 200 http://www.flyertea.com\n",
      "Thread-1 200 http://www.ly.com\n",
      "Thread-2 200 http://www.lenovo.com.cn\n",
      "Thread-4 200 http://www.qcw.com\n",
      "Thread-1 200 http://www.epwk.com\n",
      "Thread-1 200 http://www.tudou.com\n",
      "Thread-4 200 http://www.shoes.net.cn\n",
      "Error:  ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))\n",
      "Thread-1 200 http://www.leagueoflegends.com\n",
      "Thread-2 200 http://www.aso100.com\n",
      "Error:  HTTPConnectionPool(host='www.bjhjyd.gov.cn', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007953C50>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-4 200 http://www.ecvv.com\n",
      "Thread-2 403 http://www.xueqiu.com\n",
      "Thread-4 200 http://www.fanlibang.com\n",
      "Thread-1 200 http://www.aa.com\n",
      "Thread-2 200 http://www.bp.com\n",
      "Thread-4 200 http://www.jxmall.com\n",
      "Thread-1 200 http://www.garena.com\n",
      "Thread-1 200 http://www.mafengwo.cn\n",
      "Thread-4 200 http://www.xcar.com.cn\n",
      "Thread-1 200 http://www.ifensi.com\n",
      "Thread-2 200 http://www.dingtalk.com\n",
      "Thread-1 200 http://www.pptv.com\n",
      "Thread-4 200 http://www.go108.com.cn\n",
      "Thread-2 200 http://www.processon.com\n",
      "Thread-1 200 http://www.fobshanghai.com\n",
      "Thread-4 200 http://www.divcss5.com\n",
      "Thread-4 200 http://www.sc.com\n",
      "Thread-1 200 http://www.asiamiles.com\n",
      "Thread-2 200 http://www.flyme.cn\n",
      "Thread-1 200 http://www.znds.com\n",
      "Thread-2 200 http://www.a9vg.com\n",
      "Thread-1 200 http://www.hc360.com\n",
      "Thread-4 200 http://www.watchstore.com.cn\n",
      "Thread-2 200 http://www.sinaimg.cn\n",
      "Thread-1 200 http://www.job853.com\n",
      "Thread-4 200 http://www.mexgroup.com\n",
      "Thread-2 403 http://www.saic.gov.cn\n",
      "Thread-4 200 http://www.xunyingwang.com\n",
      "Thread-2 200 http://www.mgtv.com\n",
      "Thread-4 200 http://www.chinagate.cn\n",
      "Thread-4 200 http://www.zdic.net\n",
      "Thread-1 200 http://www.sf-express.com\n",
      "Error:  HTTPConnectionPool(host='www.bdimg.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007977FD0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Error:  list index out of range\n",
      "Exiting Thread-4\n",
      "Thread-2 200 http://www.nuomi.com\n",
      "Thread-1 200 http://www.lianjia.com\n",
      "Thread-2 200 http://www.tiexue.net\n",
      "Thread-2 200 http://www.vvvdj.com\n",
      "Thread-1 200 http://www.guokr.com\n",
      "Thread-2 403 http://www.tvmao.com\n",
      "Thread-1 200 http://www.cmbchina.com\n",
      "Thread-2 200 http://www.panduoduo.net\n",
      "Thread-2 200 http://www.wechat.com\n",
      "Thread-2 200 http://www.52pojie.cn\n",
      "Thread-2 200 http://www.miwifi.com\n",
      "Thread-2 200 http://www.iteye.com\n",
      "Thread-2 200 http://www.kanzhun.com\n",
      "Thread-2 200 http://www.mango.com\n",
      "Thread-2 200 http://www.cheaa.com\n",
      "Thread-2 200 http://www.13322.com\n",
      "Thread-2 200 http://www.jikexueyuan.com\n",
      "Error:  ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))\n",
      "Thread-2 200 http://www.mydigit.cn\n",
      "Thread-2 200 http://www.gusuwang.com\n",
      "Thread-2 200 http://www.pinggu.org\n",
      "Thread-2 200 http://www.lbldy.com\n",
      "Thread-2 200 http://www.sgcn.com\n",
      "Thread-2 200 http://www.misumi-ec.com\n",
      "Thread-2 200 http://www.lofter.com\n",
      "Exiting Thread-2\n",
      "Thread-1 504 http://www.modernweekly.com\n",
      "Thread-1 200 http://www.ynet.com\n",
      "Exiting Thread-1\n",
      "Error:  ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))\n",
      "Thread-3 200 http://www.6vhao.com\n",
      "Thread-3 200 http://www.5acbd.com\n",
      "Thread-3 200 http://www.atobo.com.cn\n",
      "Thread-3 200 http://www.kubo365.com\n",
      "Thread-3 200 http://www.111cn.net\n",
      "Thread-3 403 http://www.zhongmin.cn\n",
      "Thread-3 200 http://www.weiyangx.com\n",
      "Thread-3 200 http://www.juesheng.com\n",
      "Thread-3 200 http://www.uuu9.com\n",
      "Thread-3 200 http://www.siilu.com\n",
      "Thread-3 200 http://www.pconline.com.cn\n",
      "Thread-3 200 http://www.dji.com\n",
      "Thread-3 200 http://www.west.cn\n",
      "Thread-3 200 http://www.51zxw.net\n",
      "Thread-3 200 http://www.ctfile.com\n",
      "Thread-3 200 http://www.idianfa.com\n",
      "Thread-3 200 http://www.smm.cn\n",
      "Thread-3 200 http://www.shejis.com\n",
      "Thread-3 200 http://www.zhangyu.tv\n",
      "Thread-3 200 http://www.17zwd.com\n",
      "Thread-3 200 http://www.dhl.com\n",
      "Thread-3 200 http://www.shfft.com\n",
      "Thread-3 200 http://www.wanmei.com\n",
      "Thread-3 200 http://www.122.gov.cn\n",
      "Thread-3 200 http://www.51nb.com\n",
      "Thread-3 200 http://www.xici.net\n",
      "Thread-3 200 http://www.cnki.com.cn\n",
      "Thread-3 200 http://www.redocn.com\n",
      "Thread-3 200 http://www.qvc.com\n",
      "Thread-3 200 http://www.aipai.com\n",
      "Thread-3 200 http://www.dapenti.com\n",
      "Thread-3 200 http://www.3lian.com\n",
      "Thread-3 200 http://www.guidechem.com\n",
      "Thread-3 200 http://www.jiankang.com\n",
      "Thread-3 200 http://www.tgfcer.com\n",
      "Thread-3 200 http://www.freebuf.com\n",
      "Thread-3 200 http://www.sodao.com\n",
      "Thread-3 200 http://www.zhcw.com\n",
      "Thread-3 200 http://www.sh.com\n",
      "Thread-3 200 http://www.ablesky.com\n",
      "Thread-3 200 http://www.microsoftstore.com.cn\n",
      "Thread-3 200 http://www.7k7k.com\n",
      "Thread-3 200 http://www.southmoney.com\n",
      "Thread-3 521 http://www.btc123.com\n",
      "Thread-3 200 http://www.digitaling.com\n",
      "Thread-3 200 http://www.meitu.com\n",
      "Thread-3 200 http://www.chinaaet.com\n",
      "Thread-3 200 http://www.kaoyan.com\n",
      "Thread-3 200 http://www.aipai.com\n",
      "Thread-3 200 http://www.tripadvisor.cn\n",
      "Thread-3 200 http://www.colg.cn\n",
      "Thread-3 200 http://www.admin5.com\n",
      "Thread-3 200 http://www.ncar.cc\n",
      "Thread-3 200 http://www.intel.com\n",
      "Thread-3 200 http://www.wanyx.com\n",
      "Thread-3 200 http://www.chmotor.cn\n",
      "Error:  HTTPConnectionPool(host='www.mxhichina.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078F1550>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-3 200 http://www.jzb.com\n",
      "Thread-3 200 http://www.it168.com\n",
      "Thread-3 200 http://www.1kkk.com\n",
      "Thread-3 200 http://www.cnodejs.org\n",
      "Thread-3 200 http://www.hudong.com\n",
      "Thread-3 200 http://www.ucweb.com\n",
      "Thread-3 520 http://www.xyw.gov.cn\n",
      "Thread-3 200 http://www.airasiago.com\n",
      "Thread-3 200 http://www.damai.cn\n",
      "Thread-3 200 http://www.farnell.com\n",
      "Thread-3 500 http://www.hi-pda.com\n",
      "Thread-3 200 http://www.wenku1.com\n",
      "Thread-3 200 http://www.haosou.com\n",
      "Thread-3 200 http://www.ishuhui.com\n",
      "Thread-3 200 http://www.paopaoche.net\n",
      "Thread-3 200 http://www.csai.cn\n",
      "Thread-3 200 http://www.zhaoshangbao.com\n",
      "Thread-3 200 http://www.eol.cn\n",
      "Thread-3 200 http://www.excelhome.net\n",
      "Thread-3 200 http://www.missevan.com\n",
      "Thread-3 200 http://www.cncv.org.cn\n",
      "Thread-3 200 http://www.365yg.com\n",
      "Thread-3 200 http://www.huim.com\n",
      "Thread-3 200 http://www.zxxk.com\n",
      "Thread-3 200 http://www.51yes.com\n",
      "Thread-3 200 http://www.cainiao.com\n",
      "Thread-3 200 http://www.nh87.cn\n",
      "Error:  HTTPConnectionPool(host='www.b0yp.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000078C8278>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))\n",
      "Thread-3 200 http://www.qdaily.com\n",
      "Thread-3 200 http://www.kongzhong.com\n",
      "Thread-3 200 http://www.shangc.net\n",
      "Error:  HTTPConnectionPool(host='www.dongqiudi.com', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x0000000007947828>: Failed to establish a new connection: [WinError 10061] 由于目标计算机积极拒绝，无法连接。',))\n",
      "Thread-3 200 http://www.jiankang.com\n",
      "Thread-3 200 http://www.dzsc.com\n",
      "Thread-3 200 http://www.chinaacc.com\n",
      "Thread-3 403 http://www.vcg.com\n",
      "Thread-3 200 http://www.oneplusbbs.com\n",
      "Thread-3 200 http://www.xuetangx.com\n",
      "Thread-3 200 http://www.fz222.com\n",
      "Thread-3 200 http://www.cnwnews.com\n",
      "Thread-3 200 http://www.chinadmd.com\n",
      "Thread-3 200 http://www.b2b168.com\n",
      "Thread-3 200 http://www.pingan.com\n",
      "Thread-3 200 http://www.pushauction.com\n",
      "Thread-3 200 http://www.sdo.com\n",
      "Thread-3 200 http://www.9978.cn\n",
      "Thread-3 200 http://www.ltaaa.com\n",
      "Thread-3 200 http://www.gxyj.com\n",
      "Thread-3 200 http://www.kuaizhan.com\n",
      "Thread-3 200 http://www.airchina.com.cn\n",
      "Thread-3 200 http://www.gcl-power.com\n",
      "Thread-3 200 http://www.medsci.cn\n",
      "Thread-3 200 http://www.lbxcn.com\n",
      "Thread-3 200 http://www.lzgd.com.cn\n",
      "Thread-3 200 http://www.oray.com\n",
      "Thread-3 200 http://www.taobao.org\n",
      "Thread-3 461 http://www.btbtdy.com\n",
      "Thread-3 200 http://www.i2ya.com\n",
      "Thread-3 200 http://www.istar.cn\n",
      "Thread-3 200 http://www.xgo.com.cn\n",
      "Thread-3 200 http://www.66law.cn\n",
      "Thread-3 200 http://www.heiguang.com\n",
      "Thread-3 200 http://www.ao.com\n",
      "Thread-3 200 http://www.jq22.com\n",
      "Thread-3 200 http://www.qidian.com\n",
      "Thread-3 200 http://www.goldcarpet.cn\n",
      "Thread-3 200 http://www.zxbtz.cn\n",
      "Thread-3 200 http://www.jiushang.cn\n",
      "Thread-3 Exiting Main Thread200\n",
      " http://www.cicpa.org.cn\n",
      "Exiting Thread-3\n"
     ]
    }
   ],
   "source": [
    "import threading\n",
    "import requests\n",
    "import time\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "        \n",
    "start = time.time()\n",
    "class myThread (threading.Thread):\n",
    "    def __init__(self, name, link_range):\n",
    "        threading.Thread.__init__(self)\n",
    "        self.name = name\n",
    "        self.link_range = link_range\n",
    "    def run(self):\n",
    "        print (\"Starting \" + self.name)\n",
    "        crawler(self.name, self.link_range)\n",
    "        print (\"Exiting \" + self.name)\n",
    "        \n",
    "def crawler(threadName, link_range):\n",
    "    for i in range(link_range[0],link_range[1]+1):\n",
    "        try:\n",
    "            r = requests.get(link_list[i], timeout=20)\n",
    "            print (threadName, r.status_code, link_list[i])\n",
    "        except Exception as e: \n",
    "            print(threadName, 'Error: ', e)\n",
    "        \n",
    "thread_list = []\n",
    "link_range_list = [(0,200),(201,400),(401,600),(601,800),(801,1000)]\n",
    "\n",
    "# 创建新线程\n",
    "for i in range(1,6):\n",
    "    thread = myThread(\"Thread-\" + str(i), link_range_list[i-1])\n",
    "    thread.start()\n",
    "    thread_list.append(thread)\n",
    "    \n",
    "# 等待所有线程完成\n",
    "for thread in thread_list:\n",
    "    thread.join()\n",
    "\n",
    "end = time.time()\n",
    "print ('简单多线程爬虫的总时间为：', end-start)\n",
    "print (\"Exiting Main Thread\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import threading\n",
    "import requests\n",
    "import time\n",
    "import queue as Queue\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "        \n",
    "start = time.time()\n",
    "class myThread (threading.Thread):\n",
    "    def __init__(self, name, q):\n",
    "        threading.Thread.__init__(self)\n",
    "        self.name = name\n",
    "        self.q = q\n",
    "    def run(self):\n",
    "        print (\"Starting \" + self.name)\n",
    "        while True:\n",
    "            try:\n",
    "                crawler(self.name, self.q)\n",
    "            except:\n",
    "                break\n",
    "        print (\"Exiting \" + self.name)\n",
    "        \n",
    "def crawler(threadName, q):\n",
    "    url = q.get(timeout=2)\n",
    "    try:\n",
    "        r = requests.get(url, timeout=20)\n",
    "        print (threadName, r.status_code, link_list[i])\n",
    "    except Exception as e: \n",
    "        print(threadName, 'Error: ', e)\n",
    "        \n",
    "threadList = [\"Thread-1\", \"Thread-2\", \"Thread-3\",\"Thread-4\", \"Thread-5\"]\n",
    "workQueue = Queue.Queue(1000)\n",
    "threads = []\n",
    "\n",
    "# 创建新线程\n",
    "for tName in threadList:\n",
    "    thread = myThread(tName, workQueue)\n",
    "    thread.start()\n",
    "    threads.append(thread)\n",
    "    \n",
    "# 填充队列\n",
    "for url in link_list:\n",
    "    workQueue.put(url)\n",
    "\n",
    "# 等待所有线程完成\n",
    "for t in threads:\n",
    "    t.join()\n",
    "\n",
    "end = time.time()\n",
    "print ('简单多线程爬虫的总时间为：', end-start)\n",
    "print (\"Exiting Main Thread\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7.3 多进程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4\n"
     ]
    }
   ],
   "source": [
    "from multiprocessing import cpu_count\n",
    "print (cpu_count())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.3.1 使用multiprocessing的多进程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from multiprocessing import Process, Queue\n",
    "import time\n",
    "import requests\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "\n",
    "start = time.time()\n",
    "class MyProcess(Process):\n",
    "    def __init__(self, q):\n",
    "        Process.__init__(self)\n",
    "        self.q = q\n",
    "\n",
    "    def run(self):\n",
    "        print (\"Starting \" , self.pid)\n",
    "        while not self.q.empty():\n",
    "            crawler(self.q)\n",
    "        print (\"Exiting \" , self.pid)\n",
    "\n",
    "def crawler(q):\n",
    "    url = q.get(timeout=2)\n",
    "    try:\n",
    "        r = requests.get(url, timeout=20)\n",
    "        print (q.qsize(), r.status_code, url)\n",
    "    except Exception as e: \n",
    "        print (q.qsize(), url, 'Error: ', e)\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    ProcessNames = [\"Process-1\", \"Process-2\", \"Process-3\"]\n",
    "    workQueue = Queue(1000)\n",
    "\n",
    "    # 填充队列\n",
    "    for url in link_list:\n",
    "        workQueue.put(url)\n",
    "\n",
    "    for i in range(0, 3):\n",
    "        p = MyProcess(workQueue)\n",
    "        p.daemon = True\n",
    "        p.start()\n",
    "        p.join()\n",
    "\n",
    "    end = time.time()\n",
    "    print ('Process + Queue多进程爬虫的总时间为：', end-start)\n",
    "    print ('Main process Ended!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.3.2 使用Pool + Queue的多进程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from multiprocessing import Pool, Manager\n",
    "import time\n",
    "import requests\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "\n",
    "start = time.time()\n",
    "def crawler(q, index):\n",
    "    Process_id = 'Process-' + str(index)\n",
    "    while not q.empty():\n",
    "        url = q.get(timeout=2)\n",
    "        try:\n",
    "            r = requests.get(url, timeout=20)\n",
    "            print (Process_id, q.qsize(), r.status_code, url)\n",
    "        except Exception as e: \n",
    "            print (Process_id, q.qsize(), url, 'Error: ', e)\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    manager = Manager()\n",
    "    workQueue = manager.Queue(1000)\n",
    "\n",
    "    # 填充队列\n",
    "    for url in link_list:\n",
    "        workQueue.put(url)\n",
    "\n",
    "    pool = Pool(processes=3)\n",
    "    for i in range(4):\n",
    "        pool.apply_async(crawler, args=(workQueue, i))\n",
    "\n",
    "    print (\"Started processes\")\n",
    "    pool.close()\n",
    "    pool.join()\n",
    "\n",
    "    end = time.time()\n",
    "    print ('Pool + Queue多进程爬虫的总时间为：', end-start)\n",
    "    print ('Main process Ended!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7.4 多协程爬虫"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: gevent in c:\\programdata\\anaconda3\\lib\\site-packages\n",
      "Requirement already satisfied: greenlet>=0.4.10 in c:\\programdata\\anaconda3\\lib\\site-packages (from gevent)\n"
     ]
    }
   ],
   "source": [
    "!pip install gevent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import gevent\n",
    "from gevent.queue import Queue, Empty\n",
    "import time\n",
    "import requests\n",
    "\n",
    "from gevent import monkey#把下面有可能有IO操作的单独做上标记\n",
    "monkey.patch_all() # 将IO转为异步执行的函数\n",
    "\n",
    "link_list = []\n",
    "with open('alexa.txt', 'r') as file:\n",
    "    file_list = file.readlines()\n",
    "    for eachone in file_list:\n",
    "        link = eachone.split('\\t')[1]\n",
    "        link = link.replace('\\n','')\n",
    "        link_list.append(link)\n",
    "\n",
    "start = time.time()\n",
    "def crawler(index):\n",
    "    Process_id = 'Process-' + str(index)\n",
    "    while not workQueue.empty():\n",
    "        url = workQueue.get(timeout=2)\n",
    "        try:\n",
    "            r = requests.get(url, timeout=20)\n",
    "            print (Process_id, workQueue.qsize(), r.status_code, url)\n",
    "        except Exception as e: \n",
    "            print (Process_id, workQueue.qsize(), url, 'Error: ', e)\n",
    "\n",
    "def boss():\n",
    "    for url in link_list:\n",
    "        workQueue.put_nowait(url)\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    workQueue = Queue(1000)\n",
    "\n",
    "    gevent.spawn(boss).join()\n",
    "    jobs = []\n",
    "    for i in range(10):\n",
    "        jobs.append(gevent.spawn(crawler, i))\n",
    "    gevent.joinall(jobs)\n",
    "\n",
    "    end = time.time()\n",
    "    print ('gevent + Queue多协程爬虫的总时间为：', end-start)\n",
    "    print ('Main Ended!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7.5 总结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
